In [226]:
### Loading Libraries...
import pandas as pd
import numpy as np

### Graphic libraries
import matplotlib.pyplot as plt
import seaborn as sns 

### Some Scikit-learn utils
from sklearn.model_selection import train_test_split

### Metrics
from sklearn import metrics
from sklearn.metrics import accuracy_score, roc_curve, auc

### Models
from xgboost import XGBClassifier, plot_importance

import itertools
from itertools import chain
from sklearn.feature_selection import RFE
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score, learning_curve, train_test_split
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_curve, precision_recall_curve, accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score,accuracy_score,roc_auc_score

import lime
from lime.lime_tabular import LimeTabularExplainer

import eli5
from eli5.sklearn import PermutationImportance

import shap
from shap import TreeExplainer,KernelExplainer,LinearExplainer
shap.initjs()


### Some cosmetics add-ons
import warnings
warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
%matplotlib inline
In [2]:
conda install -c plotly plotly 
WARNING conda.base.context:use_only_tar_bz2(632): Conda is constrained to only using the old .tar.bz2 file format because you have conda-build installed, and it is <3.18.3.  Update or remove conda-build to get smaller downloads and faster extractions.
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.
In [3]:
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as ff
In [246]:
data = pd.read_csv('/Users/odianosenakhibi/Downloads/Thesis/data/data.csv')
In [5]:
pip install PDPbox
Requirement already satisfied: PDPbox in ./anaconda3/lib/python3.7/site-packages (0.2.0)
Requirement already satisfied: psutil in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (5.6.1)
Requirement already satisfied: numpy in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (1.16.2)
Requirement already satisfied: scipy in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (1.2.1)
Requirement already satisfied: matplotlib>=2.1.2 in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (3.0.3)
Requirement already satisfied: joblib in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (0.13.2)
Requirement already satisfied: scikit-learn in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (0.20.3)
Requirement already satisfied: pandas in ./anaconda3/lib/python3.7/site-packages (from PDPbox) (0.24.2)
Requirement already satisfied: cycler>=0.10 in ./anaconda3/lib/python3.7/site-packages (from matplotlib>=2.1.2->PDPbox) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in ./anaconda3/lib/python3.7/site-packages (from matplotlib>=2.1.2->PDPbox) (1.0.1)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in ./anaconda3/lib/python3.7/site-packages (from matplotlib>=2.1.2->PDPbox) (2.3.1)
Requirement already satisfied: python-dateutil>=2.1 in ./anaconda3/lib/python3.7/site-packages (from matplotlib>=2.1.2->PDPbox) (2.8.0)
Requirement already satisfied: pytz>=2011k in ./anaconda3/lib/python3.7/site-packages (from pandas->PDPbox) (2018.9)
Requirement already satisfied: six in ./anaconda3/lib/python3.7/site-packages (from cycler>=0.10->matplotlib>=2.1.2->PDPbox) (1.12.0)
Requirement already satisfied: setuptools in ./anaconda3/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.1.2->PDPbox) (40.8.0)
Note: you may need to restart the kernel to use updated packages.
In [247]:
np.random.seed(123) #ensure reproducibility

pd.options.mode.chained_assignment = None  #hide any pandas warnings
In [248]:
from pdpbox import pdp, info_plots #for partial plots
In [249]:
import seaborn as sns
In [250]:
from xgboost import XGBClassifier, plot_importance
In [251]:
from sklearn import metrics
from sklearn.metrics import accuracy_score, roc_curve, auc
In [252]:
data.describe()
Out[252]:
id radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave points_mean symmetry_mean ... texture_worst perimeter_worst area_worst smoothness_worst compactness_worst concavity_worst concave points_worst symmetry_worst fractal_dimension_worst Unnamed: 32
count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 ... 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 0.0
mean 3.037183e+07 14.127292 19.289649 91.969033 654.889104 0.096360 0.104341 0.088799 0.048919 0.181162 ... 25.677223 107.261213 880.583128 0.132369 0.254265 0.272188 0.114606 0.290076 0.083946 NaN
std 1.250206e+08 3.524049 4.301036 24.298981 351.914129 0.014064 0.052813 0.079720 0.038803 0.027414 ... 6.146258 33.602542 569.356993 0.022832 0.157336 0.208624 0.065732 0.061867 0.018061 NaN
min 8.670000e+03 6.981000 9.710000 43.790000 143.500000 0.052630 0.019380 0.000000 0.000000 0.106000 ... 12.020000 50.410000 185.200000 0.071170 0.027290 0.000000 0.000000 0.156500 0.055040 NaN
25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000 0.086370 0.064920 0.029560 0.020310 0.161900 ... 21.080000 84.110000 515.300000 0.116600 0.147200 0.114500 0.064930 0.250400 0.071460 NaN
50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000 0.095870 0.092630 0.061540 0.033500 0.179200 ... 25.410000 97.660000 686.500000 0.131300 0.211900 0.226700 0.099930 0.282200 0.080040 NaN
75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000 0.105300 0.130400 0.130700 0.074000 0.195700 ... 29.720000 125.400000 1084.000000 0.146000 0.339100 0.382900 0.161400 0.317900 0.092080 NaN
max 9.113205e+08 28.110000 39.280000 188.500000 2501.000000 0.163400 0.345400 0.426800 0.201200 0.304000 ... 49.540000 251.200000 4254.000000 0.222600 1.058000 1.252000 0.291000 0.663800 0.207500 NaN

8 rows × 32 columns

In [253]:
# removing the last column as it is empty

data = data.drop(['id', 'Unnamed: 32'], axis = 1)

print(data.columns)
Index(['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')
In [254]:
data.describe()
Out[254]:
radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave points_mean symmetry_mean fractal_dimension_mean ... radius_worst texture_worst perimeter_worst area_worst smoothness_worst compactness_worst concavity_worst concave points_worst symmetry_worst fractal_dimension_worst
count 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 ... 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000 569.000000
mean 14.127292 19.289649 91.969033 654.889104 0.096360 0.104341 0.088799 0.048919 0.181162 0.062798 ... 16.269190 25.677223 107.261213 880.583128 0.132369 0.254265 0.272188 0.114606 0.290076 0.083946
std 3.524049 4.301036 24.298981 351.914129 0.014064 0.052813 0.079720 0.038803 0.027414 0.007060 ... 4.833242 6.146258 33.602542 569.356993 0.022832 0.157336 0.208624 0.065732 0.061867 0.018061
min 6.981000 9.710000 43.790000 143.500000 0.052630 0.019380 0.000000 0.000000 0.106000 0.049960 ... 7.930000 12.020000 50.410000 185.200000 0.071170 0.027290 0.000000 0.000000 0.156500 0.055040
25% 11.700000 16.170000 75.170000 420.300000 0.086370 0.064920 0.029560 0.020310 0.161900 0.057700 ... 13.010000 21.080000 84.110000 515.300000 0.116600 0.147200 0.114500 0.064930 0.250400 0.071460
50% 13.370000 18.840000 86.240000 551.100000 0.095870 0.092630 0.061540 0.033500 0.179200 0.061540 ... 14.970000 25.410000 97.660000 686.500000 0.131300 0.211900 0.226700 0.099930 0.282200 0.080040
75% 15.780000 21.800000 104.100000 782.700000 0.105300 0.130400 0.130700 0.074000 0.195700 0.066120 ... 18.790000 29.720000 125.400000 1084.000000 0.146000 0.339100 0.382900 0.161400 0.317900 0.092080
max 28.110000 39.280000 188.500000 2501.000000 0.163400 0.345400 0.426800 0.201200 0.304000 0.097440 ... 36.040000 49.540000 251.200000 4254.000000 0.222600 1.058000 1.252000 0.291000 0.663800 0.207500

8 rows × 30 columns

In [255]:
data.isna().sum().sum()
Out[255]:
0
In [256]:
# Reassign target
data.diagnosis.replace(to_replace = dict(M = 1, B = 0), inplace = True)
In [257]:
# 2 datasets
M = data[(data['diagnosis'] != 0)]
B = data[(data['diagnosis'] == 0)]
In [258]:
plt.hist(data['diagnosis'])
plt.title('Diagnosis (M=1 , B=0)')
plt.show()
In [259]:
features_mean=list(data.columns[1:11])
# split dataframe into two based on diagnosis
dataM=data[data['diagnosis'] ==1]
dataB=data[data['diagnosis'] ==0]
In [260]:
#Stack the data
plt.rcParams.update({'font.size': 8})
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(8,10))
axes = axes.ravel()
for idx,ax in enumerate(axes):
    ax.figure
    binwidth= (max(data[features_mean[idx]]) - min(data[features_mean[idx]]))/50
    ax.hist([dataM[features_mean[idx]],dataB[features_mean[idx]]], bins=np.arange(min(data[features_mean[idx]]), max(data[features_mean[idx]]) + binwidth, binwidth) , alpha=0.5,stacked=True, normed = True, label=['M','B'],color=['r','g'])
    ax.legend(loc='upper right')
    ax.set_title(features_mean[idx])
plt.tight_layout()
plt.show()
In [261]:
target = 'diagnosis'
features_list = list(data.columns)
features_list.remove(target)
In [262]:
#correlation
correlation = data.corr()
#tick labels
matrix_cols = correlation.columns.tolist()
#convert to array
corr_array  = np.array(correlation)
In [263]:
# for visualizing correlations
#Plotting
trace = go.Heatmap(z = corr_array,
                   x = matrix_cols,
                   y = matrix_cols,
                   xgap = 2,
                   ygap = 2,
                   colorscale='Viridis',
                   colorbar   = dict() ,
                  )
layout = go.Layout(dict(title = 'Correlation Matrix for variables',
                        autosize = False,
                        height  = 720,
                        width   = 800,
                        margin  = dict(r = 0 ,l = 210,
                                       t = 25,b = 210,
                                     ),
                        yaxis   = dict(tickfont = dict(size = 9)),
                        xaxis   = dict(tickfont = dict(size = 9)),
                       )
                  )
fig = go.Figure(data = [trace],layout = layout)
py.iplot(fig)
In [265]:
cat_cols = list(data.select_dtypes('object').columns)
class_dict = {}
for col in cat_cols:
    data = pd.concat([data.drop(col, axis=1), pd.get_dummies(data[col])], axis=1)
data.head()
Out[265]:
diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave points_mean symmetry_mean ... radius_worst texture_worst perimeter_worst area_worst smoothness_worst compactness_worst concavity_worst concave points_worst symmetry_worst fractal_dimension_worst
0 1 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 ... 25.38 17.33 184.60 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890
1 1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 ... 24.99 23.41 158.80 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902
2 1 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 ... 23.57 25.53 152.50 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758
3 1 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 ... 14.91 26.50 98.87 567.7 0.2098 0.8663 0.6869 0.2575 0.6638 0.17300
4 1 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 ... 22.54 16.67 152.20 1575.0 0.1374 0.2050 0.4000 0.1625 0.2364 0.07678

5 rows × 31 columns

In [266]:
# building train/test datasets on a 70/30 ratio
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.3, random_state=42)
X_train.shape, X_test.shape
Out[266]:
((398, 31), (171, 31))
In [275]:
ML_models = {}
model_index = ['LR','RF','NN']
model_sklearn = [LogisticRegression(solver='liblinear',random_state=0),
                 RandomForestClassifier(n_estimators=100,random_state=0),
                 MLPClassifier([100]*5,early_stopping=True,learning_rate='adaptive',random_state=0)]
model_summary = []
for name,model in zip(model_index,model_sklearn):
    ML_models[name] = model.fit(X_train,y_train)
    preds = model.predict(X_test)
    model_summary.append([name,f1_score(y_test,preds,average='weighted'),accuracy_score(y_test,preds),
                          roc_auc_score(y_test,model.predict_proba(X_test)[:,1])])
ML_models
Out[275]:
{'LR': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='warn',
           n_jobs=None, penalty='l2', random_state=0, solver='liblinear',
           tol=0.0001, verbose=0, warm_start=False),
 'RF': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
             max_depth=None, max_features='auto', max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
             oob_score=False, random_state=0, verbose=0, warm_start=False),
 'NN': MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
        beta_2=0.999, early_stopping=True, epsilon=1e-08,
        hidden_layer_sizes=[100, 100, 100, 100, 100],
        learning_rate='adaptive', learning_rate_init=0.001, max_iter=200,
        momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
        power_t=0.5, random_state=0, shuffle=True, solver='adam',
        tol=0.0001, validation_fraction=0.1, verbose=False,
        warm_start=False)}
In [276]:
model_summary = pd.DataFrame(model_summary,columns=['Name','F1_score','Accuracy','AUC_ROC'])
model_summary = model_summary.reset_index()
display(model_summary)
index Name F1_score Accuracy AUC_ROC
0 0 LR 1.000000 1.000000 1.000000
1 1 RF 1.000000 1.000000 1.000000
2 2 NN 0.927978 0.929825 0.990741
In [269]:
g=sns.regplot(data=model_summary, x="index", y="AUC_ROC", fit_reg=False,
               marker="o", color="black", scatter_kws={'s':500})
 
for i in range(0,model_summary.shape[0]):
     g.text(model_summary.loc[i,'index'], model_summary.loc[i,'AUC_ROC']+0.02, 
            model_summary.loc[i,'Name'], 
            horizontalalignment='center',verticalalignment='top', size='large', color='black')
In [278]:
#initialization of a explainer from LIME
explainer = LimeTabularExplainer(X_train.values,
                                 mode='classification',
                                 feature_names=X_train.columns,
                                 class_names=['Malign','Benign'])
In [279]:
exp = explainer.explain_instance(X_test.head(1).values[0],
                                 ML_models['LR'].predict_proba,
                                 num_features=X_train.shape[1])
exp.show_in_notebook(show_table=True, show_all=True)
In [280]:
exp = explainer.explain_instance(X_test.head(1).values[0],
                                 ML_models['RF'].predict_proba,
                                 num_features=X_train.shape[1])
exp.show_in_notebook(show_table=True, show_all=False)
In [281]:
exp = explainer.explain_instance(X_test.head(1).values[0],
                                 ML_models['NN'].predict_proba,
                                 num_features=X_train.shape[1])
exp.show_in_notebook(show_table=True, show_all=False)
In [231]:
#Eli5
eli5.show_weights(ML_models['LR'], feature_names = list(X_test.columns))
Out[231]:

y=1 top features

Weight? Feature
+4.248 diagnosis
+0.696 concavity_worst
+0.534 compactness_worst
+0.277 concavity_mean
+0.243 concave points_worst
+0.240 symmetry_worst
+0.213 texture_worst
+0.187 compactness_mean
+0.135 concave points_mean
+0.095 perimeter_worst
+0.079 smoothness_worst
+0.069 symmetry_mean
+0.065 area_se
+0.047 fractal_dimension_worst
… 10 more positive …
… 2 more negative …
-0.064 perimeter_mean
-0.067 texture_mean
-0.200 <BIAS>
-0.579 texture_se
-0.838 radius_worst
-1.042 radius_mean
In [277]:
eli5.show_prediction(ML_models['LR'], X_test.head(1).values[0],feature_names=list(X_test.columns))
Out[277]:

y=0 (probability 0.955, score -3.065) top features

Contribution? Feature
+12.996 radius_mean
+12.538 radius_worst
+5.191 perimeter_mean
+1.246 texture_mean
+0.604 texture_se
+0.200 <BIAS>
+0.115 perimeter_se
+0.002 radius_se
-0.000 fractal_dimension_se
-0.000 smoothness_se
-0.000 concave points_se
-0.000 symmetry_se
-0.001 compactness_se
-0.001 fractal_dimension_mean
-0.001 concavity_se
-0.004 fractal_dimension_worst
-0.004 smoothness_mean
-0.005 concave points_mean
-0.011 smoothness_worst
-0.013 symmetry_mean
-0.020 compactness_mean
-0.022 concavity_mean
-0.025 concave points_worst
-0.072 symmetry_worst
-0.127 compactness_worst
-0.186 concavity_worst
-1.978 area_se
-2.678 area_mean
-5.252 texture_worst
-9.130 perimeter_worst
-10.297 area_worst
In [233]:
exp = PermutationImportance(ML_models['LR'],
                            random_state = 0).fit(X_test, y_test)
eli5.show_weights(exp,feature_names=list(X_test.columns))
Out[233]:
Weight Feature
0.4292 ± 0.0585 area_worst
0.1357 ± 0.0136 perimeter_worst
0.0936 ± 0.0277 diagnosis
0.0713 ± 0.0136 area_se
0.0503 ± 0.0119 area_mean
0.0456 ± 0.0227 radius_mean
0.0374 ± 0.0204 radius_worst
0.0082 ± 0.0057 texture_worst
0.0035 ± 0.0094 perimeter_mean
0 ± 0.0000 smoothness_mean
0 ± 0.0000 compactness_mean
0 ± 0.0000 compactness_se
0 ± 0.0000 concave points_mean
0 ± 0.0000 symmetry_mean
0 ± 0.0000 fractal_dimension_mean
0 ± 0.0000 radius_se
0 ± 0.0000 texture_se
0 ± 0.0000 fractal_dimension_worst
0 ± 0.0000 texture_mean
0 ± 0.0000 concavity_mean
… 11 more …
In [234]:
eli5.show_weights(ML_models['RF'],feature_names=list(X_test.columns))
Out[234]:
Weight Feature
0.2328 ± 0.6890 diagnosis
0.1247 ± 0.5062 concave points_worst
0.0972 ± 0.4450 radius_worst
0.0810 ± 0.4192 perimeter_worst
0.0799 ± 0.3877 concave points_mean
0.0657 ± 0.3243 area_worst
0.0587 ± 0.3254 concavity_mean
0.0451 ± 0.2917 radius_mean
0.0374 ± 0.2394 concavity_worst
0.0273 ± 0.2078 perimeter_mean
0.0253 ± 0.1916 area_se
0.0216 ± 0.1863 area_mean
0.0133 ± 0.0647 symmetry_worst
0.0127 ± 0.0511 texture_mean
0.0120 ± 0.0974 compactness_worst
0.0119 ± 0.1296 perimeter_se
0.0092 ± 0.0373 texture_worst
0.0089 ± 0.0901 radius_se
0.0057 ± 0.0306 smoothness_worst
0.0042 ± 0.0428 concave points_se
… 11 more …
In [235]:
eli5.show_prediction(ML_models['RF'], X_test.head(1).values[0],feature_names=list(X_test.columns))
Out[235]:

y=0 (probability 0.970) top features

Contribution? Feature
+0.623 <BIAS>
+0.079 diagnosis
+0.056 concave points_worst
+0.048 perimeter_worst
+0.041 concave points_mean
+0.040 radius_worst
+0.027 area_worst
+0.019 radius_mean
+0.014 area_se
+0.013 concavity_mean
+0.008 perimeter_se
+0.006 compactness_worst
+0.005 symmetry_worst
+0.003 texture_worst
+0.003 concave points_se
+0.003 radius_se
+0.002 texture_mean
+0.002 fractal_dimension_worst
+0.001 texture_se
+0.001 compactness_mean
+0.001 symmetry_se
+0.001 perimeter_mean
+0.001 smoothness_se
+0.001 fractal_dimension_mean
+0.001 area_mean
+0.001 symmetry_mean
+0.000 concavity_se
-0.002 smoothness_worst
-0.009 smoothness_mean
-0.019 concavity_worst
In [236]:
exp = PermutationImportance(ML_models['RF'],
                            random_state = 0).fit(X_test, y_test)
eli5.show_weights(exp,feature_names=list(X_test.columns))
Out[236]:
Weight Feature
0.2842 ± 0.0500 diagnosis
0 ± 0.0000 perimeter_mean
0 ± 0.0000 area_mean
0 ± 0.0000 smoothness_mean
0 ± 0.0000 compactness_mean
0 ± 0.0000 concavity_mean
0 ± 0.0000 concave points_mean
0 ± 0.0000 symmetry_mean
0 ± 0.0000 fractal_dimension_mean
0 ± 0.0000 smoothness_se
0 ± 0.0000 texture_se
0 ± 0.0000 perimeter_se
0 ± 0.0000 area_se
0 ± 0.0000 radius_mean
0 ± 0.0000 fractal_dimension_worst
0 ± 0.0000 concavity_se
0 ± 0.0000 compactness_worst
0 ± 0.0000 smoothness_worst
0 ± 0.0000 texture_mean
0 ± 0.0000 radius_se
… 11 more …
In [237]:
eli5.show_weights(ML_models['NN'])
Out[237]:
Error: estimator MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=True, epsilon=1e-08, hidden_layer_sizes=[100, 100, 100, 100, 100], learning_rate='adaptive', learning_rate_init=0.001, max_iter=200, momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5, random_state=0, shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) is not supported
In [238]:
#SHAP
explainer = LinearExplainer(ML_models['LR'], X_train, feature_dependence="independent")
shap_values = explainer.shap_values(X_test.head(1).values)
shap.force_plot(explainer.expected_value,
                shap_values,
                X_test.head(1).values,
                feature_names=X_test.columns)
Out[238]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [239]:
shap_values = explainer.shap_values(X_test.head(250).values)
shap.force_plot(explainer.expected_value,
                shap_values,
                X_test.head(250).values,
                feature_names=X_test.columns)
Out[239]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [240]:
shap_values = explainer.shap_values(X_test.values)
spplot = shap.summary_plot(shap_values, X_test.values, feature_names=X_test.columns)
In [242]:
top4_cols = ['area_worst','radius_worst','radius_mean','perimeter_worst']
for col in top4_cols:
    shap.dependence_plot(col, shap_values, X_test)
In [243]:
explainer = TreeExplainer(ML_models['RF'])
shap_values = explainer.shap_values(X_test.head(1).values)
shap.force_plot(explainer.expected_value[1],
                shap_values[1],
                X_test.head(1).values,
                feature_names=X_test.columns)
Out[243]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [245]:
X_train_kmeans = shap.kmeans(X_train, 10)
explainer = KernelExplainer(ML_models['NN'].predict_proba,X_train_kmeans)
shap_values = explainer.shap_values(X_test.head(1).values)
shap.force_plot(explainer.expected_value[1],
                shap_values[1],
                X_test.head(1).values,
                feature_names=X_test.columns)
100%|██████████| 1/1 [00:00<00:00,  2.66it/s]
Out[245]:
Visualization omitted, Javascript library not loaded!
Have you run `initjs()` in this notebook? If this notebook was from another user you must also trust this notebook (File -> Trust notebook). If you are viewing this notebook on github the Javascript has been stripped for security. If you are using JupyterLab this error is because a JupyterLab extension has not yet been written.
In [ ]: